############### ###############
## cbo_regression2.R
## Project: CBO
## Author: Kamil Kouhen
## Date of creation: 01/05/2022
############### ###############

# Function to be able to export any kind of model into standardized stargazer model (code very close to cbo_regression1 but different output)
# Use startable to obtain stargazer output. cbo_regression2 produces a homogenized coeftest output.

cbo_regression2 <- function(modelname = "", #Name your model (e.g. "lm1")
                            df = "",
                            specification = "OLS",
                            outcomevar = "",
                            treatmentvar,
                            ind.vars = NULL,
                            FE = NULL,
                            stde = "HC0", #Standard error type (defautl is HC0)
                            clustered.sd = NULL,
                            weights = NULL,
                            standardized.outcome = FALSE,
                            control.mean.yn = FALSE, #Create control group mean and put in environment? (as control.mean.modelname)
                            control.mean.what.treatment = NULL, #Specify treatment variable used to identify control group
                            display.results = TRUE, #Show regression table (from coeftest so with robust stde)?
                            tobit.lower = NULL, #Only for tobit models
                            tobit.upper = NULL){ #Only for tobit models

  if (missing(modelname) | is.null(modelname)){
    stop("Error: Please specify a model name before continuing.")
    break
  }

  ## In order to make code shorter than for cbo_regression, using a more dynamic syntax ##

  #Checking first that specification exists
  if (specification != "OLS" & specification != "LOGIT" & specification != "Negative Binomials" & specification != "TOBIT"){
    stop("The model specified does not exist, please check the source code to see what model is currently available (OLS, LOGIT etc.)")
  }

  if (specification == "OLS"){
    spec <- "lm"
    addon <- ""
  }
  if (specification == "LOGIT"){
    spec <- "glm"
    addon <- ", family = 'binomial'"
  }
  if (specification == "Negative Binomials"){
    spec <- "MASS::glm.nb"
    addon <- ""
  }
  if (specification == "TOBIT"){
    spec <- "AER::tobit"
    addon <- paste0(", dist = 'gaussian', left =", tobit.lower, ", right =", tobit.upper)
  }

  ## Small adjustments to make the code work (+ between independent variable names, FE variables, and treatment variables) ##
  if (!missing(ind.vars) & !is.null(ind.vars)){ #Independent vars are specified in a vector, need to put them in "var1 + var2..." format
    ind.varsvec <- gsub(", "," + ", toString(ind.vars))
  }
  if (!missing(FE) & !is.null(FE)){
    FE.vec <- gsub(", "," + ", toString(FE))
  }

  if (!missing(treatmentvar) & !is.null(treatmentvar)){ #Independent vars are specified in a vector, need to put them in "var1 + var2..." format
    treatmentvars <- gsub(", "," + ", toString(treatmentvar)) #If only one variable specified, this code doesn't do anything
  }

  if (control.mean.yn == TRUE){
    #If standardization not wanted
    if (standardized.outcome == FALSE){
      if (missing(control.mean.what.treatment)){
        stop("Please specify control.mean.what.treatment in order to calculate control group mean: treatment variable used to identify control group.")
        break
      } else{
        eval(parse(text = paste0("control.mean.", modelname, " <<- round(mean((", df, "%>% filter(", control.mean.what.treatment, "== 0))$", outcomevar,", na.rm = TRUE), digits = 2)"))) #Creating output: control group mean
      }#For some reason, the base R syntax for subsetting did not work here (df[df$simu_D]), I had to use dplyr
    }

    #If standardization wanted
    if (standardized.outcome == TRUE){
      if (missing(control.mean.what.treatment)){
        stop("Please specify control.mean.what.treatment in order to calculate control group mean: treatment variable used to identify control group.")
        break
      } else{
        eval(parse(text = paste0("control.mean.", modelname, " <<- round(mean(scale((", df, "%>% filter(", control.mean.what.treatment, "== 0))$", outcomevar,"), na.rm = TRUE), digits = 2)"))) #Creating output: control group mean
      }#For some reason, the base R syntax for subsetting did not work here (df[df$simu_D]), I had to use dplyr
    }
  }

  ## Need to use a more dynamic way to add optional specifications ##

  #If no other covariates than treatment variables
  if (missing(ind.vars) | is.null(ind.vars)){
    ind.varsvec <- " + "
  }

  #If other covariates than treatment variables
  if (!missing(ind.vars) & !is.null(ind.vars)){
    ind.varsvec <- paste0(" + ", ind.varsvec)
  }

  #If no FE
  if (missing(FE) | is.null(FE)){
    FE.vec <- " + "
  }

  #If FE
  if (!missing(FE) & !is.null(FE)){
    FE.vec <- paste0(" + ", FE.vec)
  }

  #If no weights
  if (missing(weights) | is.null(weights)){
    wgts <- ""
  }

  #If weights
  if (!missing(weights) & !is.null(weights)){
    wgts <- paste0(", weights = ", weights)
  }

  ## Creating estimations and output ##
  if (standardized.outcome == F | missing(standardized.outcome)){
    cmd1 <- paste0(spec, "(", outcomevar, "~", treatmentvars, ind.varsvec, FE.vec, " , data = ", df, ", na.action(na.omit)", wgts, addon, ")")
  }
  if (standardized.outcome == T){
    cmd1 <- paste0(spec, "(scale(", outcomevar, ")~", treatmentvars, ind.varsvec, FE.vec, " , data = ", df, ", na.action(na.omit)", wgts, addon, ")")
  }

  #If non-clustered se
  if (missing(clustered.sd) | is.null(clustered.sd)){
    cmd2 <- paste0("coeftest(", cmd1, ", vcov = vcovHC(", cmd1, ", type ='", stde, "'))")
    coeft <- eval(parse(text = cmd2))
  }
  #If clustered se
  if (!missing(clustered.sd) & !is.null(clustered.sd)){
    cmd2 <- paste0("coeftest(", cmd1, ", vcov = vcovCL(", cmd1, ", cluster = factor(", df, "$", clustered.sd,"), type ='", stde, "'))")
    coeft <- eval(parse(text = cmd2))
  }

  #Outputs (to extract with indices: cbo_regression2$...)
  eval(parse(text = paste0(modelname, "<<- coeft"))) #Creating output: model

  if (display.results == TRUE){ #Display regression output from coeftest or not
    return(coeft)
  }

  print("Output should be found in environment. modelname is normal regression output (with robust se). If specified with control.mean.yn, control group mean is saved as control.mean.modelname")

} #End of cbo_regression2
